********************************************************************************
* This do file prepares data for firm-product level DD analysis (STEP 2)
* Last modified by: Yk Wang
* Date: 09/14/2018
********************************************************************************

clear all
set more off
set scheme s1color

***Set directory
capture cd "/Users/Dropbox/Chinese food exports"


***locals
local eventyear=2009

********************************
* Prepare main dataset: STEP 2 *
********************************

forvalues i=0/1{

use "Data/Coded data/Customs/customs_yearly_2000_2013_firmproduct_level_dairy_step1",replace
keep if dairy==1

	if `i'==1{
	
			drop if ban_country==1
		
              }
			  
	g price=value/quantity

	bys party_id hs_id year: egen sum_value=total(value)
	bys party_id hs_id year: egen sum_quantity=total(quantity)
	bys party_id hs_id year:egen avg_price=mean(price)   //gen price($/kg)

	bys party_id hs_id year: egen sum_oecd_value=sum(value) if oecdhighincome==1
	bys party_id hs_id year: egen sum_oecd_quantity=sum(quantity) if oecdhighincome==1
	bys party_id hs_id year: egen avg_oecd_price=mean(price) if oecdhighincome==1
	
	***Generate affected locations (sourcing location)
    ***prov
	g sourceprov=substr(city_id,1,2)
	g sourcecity=substr(city_id,1,4)

	preserve
	keep if year<=2007
	keep sourceprov affected_firm_products
	keep if affected_firm_products==1 
	duplicates drop sourceprov,force
	keep sourceprov

	tempfile affected_sourceprov
	save `affected_sourceprov',replace
	restore
	
	
	preserve
	keep if year<=2007
	keep sourcecity affected_firm_products
	keep if affected_firm_products==1 
	duplicates drop sourcecity,force
	keep sourcecity

	tempfile affected_sourcecity
	save `affected_sourcecity',replace
	restore
	
	
	preserve
	keep if year<=2007
	keep sourceprov hs_id affected_firm_products
	keep if affected_firm_products==1 
	duplicates drop sourceprov hs_id,force
	keep sourceprov hs_id

	tempfile affected_sourceprov_product
	save `affected_sourceprov_product',replace
	restore
	
	
	preserve
	keep if year<=2007
	keep sourcecity hs_id affected_firm_products
	keep if affected_firm_products==1 
	duplicates drop sourcecity hs_id,force
	keep sourcecity hs_id

	tempfile affected_sourcecity_product
	save `affected_sourcecity_product',replace
	restore
	
	
	preserve
	keep if year<=2007
	merge m:1 sourceprov using `affected_sourceprov'
	drop if _m==2
	
	gen affected_sourceprov=(_m==3)
	drop _m
	collapse (sum) value, by(party_id affected_sourceprov)
	bys party_id: gen N=_N
	bys party_id: egen sum_value=sum(value)
	keep if (N==2 & affected_sourceprov==1) | N==1
	replace value=0 if affected_sourceprov==0
	gen affected_sourceprov_vs=value/sum_value

	keep party_id affected_sourceprov affected_sourceprov_vs

	tempfile affected_sourceprov_firm
	save `affected_sourceprov_firm',replace	
	restore
	
	
	preserve
	keep if year<=2007
	merge m:1 sourcecity using `affected_sourcecity'
	drop if _m==2
	
	gen affected_sourcecity=(_m==3)
	drop _m
	collapse (sum) value, by(party_id affected_sourcecity)
	bys party_id: gen N=_N
	bys party_id: egen sum_value=sum(value)
	keep if (N==2 & affected_sourcecity==1) | N==1
	replace value=0 if affected_sourcecity==0
	gen affected_sourcecity_vs=value/sum_value

	keep party_id affected_sourcecity affected_sourcecity_vs

	tempfile affected_sourcecity_firm
	save `affected_sourcecity_firm',replace	
	restore
	
	
	preserve
	keep if year<=2007
	merge m:1 sourceprov hs_id using `affected_sourceprov_product'
	drop if _m==2
	
	gen affected_sourceprov_products=(_m==3)
	drop _m
	collapse (sum) value, by(party_id affected_sourceprov_products)
	bys party_id: gen N=_N
	bys party_id: egen sum_value=sum(value)
	keep if (N==2 & affected_sourceprov_products==1) | N==1
	replace value=0 if affected_sourceprov_products==0
	gen affected_sourceprov_prod_vs=value/sum_value
	
	keep party_id affected_sourceprov_products affected_sourceprov_prod_vs

	tempfile affected_sourceprov_firmprod
	save `affected_sourceprov_firmprod',replace	
	restore
	
	
	preserve
	keep if year<=2007
	merge m:1 sourcecity hs_id using `affected_sourcecity_product'
	drop if _m==2
	
	gen affected_sourcecity_products=(_m==3)
	drop _m
	collapse (sum) value, by(party_id affected_sourcecity_products)
	bys party_id: gen N=_N
	bys party_id: egen sum_value=sum(value)
	keep if (N==2 & affected_sourcecity_products==1) | N==1
	replace value=0 if affected_sourcecity_products==0
	gen affected_sourcecity_prod_vs=value/sum_value
	
	keep party_id affected_sourcecity_products affected_sourcecity_prod_vs

	tempfile affected_sourcecity_firmprod
	save `affected_sourcecity_firmprod',replace	
	restore
	
		
	***Generate affected locations (firm location)
    ***prov
	preserve
	keep firmprov hs_id affected_firm_products
	keep if affected_firm_products==1 
	duplicates drop firmprov hs_id,force

	tempfile affected_firmloc_products
	save `affected_firmloc_products',replace
	restore


	preserve
	keep firmprov affected_firms
	keep if affected_firm==1 
	duplicates drop firmprov,force

	tempfile affected_firmloc
	save `affected_firmloc',replace
	restore


	merge m:1 firmprov using `affected_firmloc'
	tab _m
	g affected_firmloc=0
	replace affected_firmloc=1 if _m==3
	drop _m

	merge m:1 firmprov hs_id using `affected_firmloc_products'
	tab _m
	g affected_firmloc_products=0
	replace affected_firmloc_products=1 if _m==3
	drop _m
		

	collapse (mean) sum_value sum_quantity sum_oecd_value sum_oecd_quantity avg_price avg_oecd_price ///
		(firstnm) hs2digit food dairy affected_firms affected_products affected_firm_products ///
		innocent_firms innocent_firm_products noninsp_firms ///
		noninsp_firm_products interm driving_firms type_id2 firm_sales_bl ///
		firm_sourceloc firm_exp_bl firmprod_sales_bl firmprod_sourceloc firmprod_exp_bl ///
		affected_firmloc affected_firmloc_products hs4avg_sales_bl hs2avg_sales_bl intensity_aproducts ///
		intensity_afirms ban_country round1 round2 round3 news, by (party_id hs_id hs6 year)
		
	
	foreach v of varlist sum_oecd_value sum_oecd_quantity{

		replace `v'=0 if `v'==.
		
		}
	
	
	ren sum_quantity quantity
	ren sum_value value
	ren avg_price price
	ren sum_oecd_quantity oecd_quantity
	ren sum_oecd_value oecd_value
	ren avg_oecd_price oecd_price

	gen oecd_value_share=oecd_value/value   //oecd value share
	 
	***Take a random sample
	
	preserve
	keep party_id hs2digit food dairy
	collapse (max) food dairy,by(party_id hs2digit)
	bys hs2digit: g a=runiform()
	replace a=0 if food==1|dairy==1	//KEEP ALL FOOD AND DAIRY FIRMS
	collapse (min) a,by(party_id)
	
	tempfile insample
	save `insample',replace
	restore 

	merge m:1 party_id using `insample'
	drop _m

	bys party_id: egen b=mean(affected_products)
	replace a=0 if b>0|affected_firms==1 //KEEP all affected firms
	keep if a<0.01
	drop a b

	egen id=group(party_id hs_id)
	
	***Fill in firmproduct and year to capture extensive margin 
	
	preserve

	keep id party_id hs_id hs6 food dairy interm affected_firms affected_products affected_firm_products ///
		type_id2  driving_firms firm_sales_bl firm_sourceloc firm_exp_bl firmprod_sales_bl firmprod_sourceloc ///
		firmprod_exp_bl hs4avg_sales_bl hs2avg_sales_bl innocent_firms innocent_firm_products affected_firmloc ///
		affected_firmloc_products intensity_aproducts intensity_afirms noninsp_firms ///
		noninsp_firm_products round1 round2 round3 news ///
		
	duplicates drop id,force
	sort id

	tempfile fillinvars1
	save `fillinvars1',replace

	restore
		
		
	fillin id year

	merge m:1 id using `fillinvars1', update
	
	drop if _m==2
	drop _m
	
	foreach v of varlist value quantity oecd_value oecd_quantity {

		replace `v'=0 if _fillin==1
		
		}
		

	ren _fillin firmprodyear_fillin	
	drop id
	
	merge m:1 party_id using `affected_sourceprov_firm'
	drop if _m==2
	drop _m
	
	merge m:1 party_id using `affected_sourcecity_firm'
	drop if _m==2
	drop _m
	
	merge m:1 party_id using `affected_sourceprov_firmprod'
	drop if _m==2
	drop _m
	
	merge m:1 party_id using `affected_sourcecity_firmprod'
	drop if _m==2
	drop _m

	***Generate variables for regression analysis

	gen lvalue=log(value+0.1)
	gen lquantity=log(quantity+0.1)
	gen lprice=log(price)
	gen lvalue_oecd=log(oecd_value+0.1)
	gen lquantity_oecd=log(oecd_quantity+0.1)
	gen lprice_oecd=log(oecd_price)
	
	gen ihsvalue=log(value+(value^2+1)^0.5)
	gen ihsquantity=log(quantity+(quantity^2+1)^0.5)
	gen ihsvalue_oecd=log(oecd_value+(oecd_value^2+1)^0.5)
	gen ihsquantity_oecd=log(oecd_quantity+(oecd_quantity^2+1)^0.5)

	g entry=1-firmprodyear_fillin
	g entry_oecd=(oecd_value>0)

	gen post=(year>=2009)
	gen sourceprovXproductsXpost=affected_sourceprov_products*post
	gen sourceprovXpost=affected_sourceprov*post
	gen sourceprovXproducts_vsXpost=affected_sourceprov_prod_vs*post
	gen sourceprov_vsXpost=affected_sourceprov_vs*post
	
	gen sourcecityXproductsXpost=affected_sourcecity_products*post
	gen sourcecityXpost=affected_sourcecity*post
	gen sourcecityXproducts_vsXpost=affected_sourcecity_prod_vs*post
	gen sourcecity_vsXpost=affected_sourcecity_vs*post
	
	gen firmlocXproductsXpost=affected_firmloc_products*post
	gen firmlocXpost=affected_firmloc*post
	
	gen firmXproductsXpost=affected_firm_products*post
	gen firmXpost=affected_firms*post
	gen productsXpost=affected_products*post
	gen ifirmXproductsXpost=innocent_firm_products*post
	gen ifirmXpost=innocent_firms*post
	gen intensityfirmXpost=intensity_afirms*post
	gen intensityproductsXpost=intensity_aproducts*post
	gen dairyXpost=dairy*post
	gen foodXpost=food*post

	tab year,g(year)
	foreach v of var year*{
		g firmX`v'=affected_firms*`v'
		g productsX`v'=affected_products*`v'
		g firmXproductsX`v'=affected_firm_products*`v'
		g ifirmX`v'=innocent_firms*`v'
		g ifirmXproductsX`v'=innocent_firm_products*`v'
		g intensityfirmX`v'=intensity_afirms*`v'
		g intensityproductsX`v'=intensity_aproducts*`v'
		}

	replace value=value/10^6  //replace value into million dollars


	***Generate FEs
	
	egen party=group(party_id)
	
	egen product_firm=group(party hs_id)
	egen product_year=group(hs_id year)
	
	***Define interaction variables for regressions
	
	g largesize_hs4=(firm_sales_bl>hs4avg_sales_bl)
	g largesize_hs2=(firm_sales_bl>hs2avg_sales_bl)
	g experienced=(firm_exp_bl>1)

	ren type_id2 ownership
	
	
	
	***Generate Firm-specific value shares to different destinations
	preserve
	use "Data/bilateral_trade/dta/comtrade_dairy_total_exclchn_coded.dta", clear 
	duplicates drop origin_id, force
	keep origin_id
	tempfile origin_list
	save `origin_list', replace
	restore
	
	preserve
	use "Data/Coded data/Customs/customs_yearly_2000_2013_firmproduct_level_dairy_step1", clear
	keep if year<=2007
	merge m:1 origin_id using `origin_list'
	keep if _m==3
	drop _m
	keep party_id origin_id value
	collapse (sum) value, by(party_id origin_id)
	bys party_id: egen totalvalue=total(value)
	gen value_perc=value/totalvalue
	keep party_id origin_id value_perc
	expand 14
	egen id=group(party_id origin_id)
	bys id : gen year = 2000 + _n-1
	drop id
	tempfile value_perc
	save `value_perc'
	
	duplicates drop party_id, force
	keep party_id
	tempfile party07
	save `party07', replace
	restore
	
	preserve
	use `value_perc', clear
	merge m:1 origin_id year using "Data/bilateral_trade/dta/comtrade_dairy_total_exclchn_coded.dta"
	gen dairyimportXvalueshare=dairy_import_value*value_perc
	drop if dairyimportXvalueshare<0
	collapse (sum) dairyimportXvalueshare, by(party_id year)
	ren dairyimportXvalueshare firmspecdemandshock
	keep party_id year firmspecdemandshock
    drop if party_id==""
	tempfile firm_shock
	save `firm_shock', replace
	restore

	merge m:1 party_id using `party07'
	gen nonexp07=(_m==1)
	drop _m
	
	merge m:1 party_id year using `firm_shock'
	drop _m
	
	***rename variable label
	label var party_id "FirmID"
	label var hs_id "HS8digit"
	label var hs2digit "HS2digit"
	label var food "Food sector (dummy)"
	label var dairy "Dairy sector (dummy)"
	
	label var year "Year"
	label var entry "Exports > 0 (dummy)"
	label var entry_oecd "Exports to OECD > 0 (dummy)"
	label var value "annual value (in million dollars)"
	label var lvalue "log value"
	label var ihsvalue "IHS value"
	label var quantity "annual quantity"
	label var lquantity "log quantity"
	label var ihsquantity "IHS quantity"
	label var price "annual average price"
	label var lprice "log price"
	label var oecd_value "value exported to oecd&high income countries"
	label var lvalue_oecd "log value exported to oecd&high income countries"
	label var ihsvalue_oecd "IHS value exported to oecd&high income countries"
	label var oecd_quantity "quantity exported to oecd&high income countries"
	label var lquantity_oecd "log quantity exported to oecd&high income countries"
	label var ihsquantity_oecd "IHS quantity exported to oecd&high income countries"
	label var oecd_price "annual average price to oecd countries"
	label var lprice_oecd "log annual average price to oecd countries"
	label var oecd_value_share "value share exported to oecd&high income countries"
	label var nonexp07 "firms that never exported prior to 2008"

	label var affected_firms "Contaminated firms"
	label var intensity_afirms "Intensity of affected firms"
	label var affected_products "Contaminated products"
    label var intensity_aproducts "Intensity of affected products"
	label var affected_firm_products "Contaminated firm-product pairs"
	label var innocent_firms "Innocent firms"
	label var innocent_firm_products "Innocent firm-product pairs"
	label var interm "Intermediaries"
	label var driving_firms "Firms contributed to over 1% of the spike in 06-07"
	label var round1 "1st round inspected firms"
	label var round2 "2nd round inspected firms"
	label var round3 "3rd round inspected firms"
	label var news "firms that are found contaminated by foreign governments and released by news"
	
	label var post "Post (2009-2013)"
	label var firmXproductsXpost "CFirm-ProductXPost"
	label var firmXpost "CFirmXPost"
	label var ifirmXproductsXpost "IFirm-ProductXPost"
	label var ifirmXpost "IFirmXPost"
	label var intensityfirmXpost "Intensity of CFirmXPost"
	label var intensityproductsXpost "Intensity of CProductXPost"
	label var productsXpost "CProductXPost"
	label var dairyXpost "DairyXPost"
	label var foodXpost "FoodXPost"
	label var firmlocXproductsXpost "Locations(firms)ProductsXPost"
	label var firmlocXpost "Locations(firms)XPost"
	label var sourceprovXproductsXpost "CSourceProv-ProductXPost"
	label var sourceprovXpost "CSourceProvXPost"
    label var sourcecityXproductsXpost "CSourceCity-ProductXPost"
	label var sourcecityXpost "CSourceCityXPost"
	
	label var sourceprovXproducts_vsXpost "Intensity of CSourceProv-ProductXPost"
	label var sourceprov_vsXpost "Intensity of CSourceProvXPost"
	label var sourcecityXproducts_vsXpost "Intensity of CSourceCity-ProductXPost"
	label var sourcecity_vsXpost "Intensity of CSourceCityXPost"
	
	label var firmspecdemandshock "Firmspecdemandshock"
	
	label var largesize_hs4 "Baseline exports volumn greater than the median of the same HS4 industry"
	label var largesize_hs2 "Baseline exports volumn greater than the median of the same HS2 industry"
	label var experienced "Exported for more than 1 year prior to 2008"
	label var ownership "1=state owned; 2=private; 3=joint venture; 4=foreign"
	label var firm_sales_bl "Firm's baseline total exports volumn"
	label var firm_sourceloc "Firm's baseline major sourcing province"
	label var firm_exp_bl "Firm's baseline number of exporting years"
	label var firmprod_sales_bl "Firm's baseline total exports volumn for a given product (HS8)"
	label var firmprod_sourceloc "Firm's baseline major sourcing province for a given product (HS8)"
	label var firmprod_exp_bl "Firm's baseline number of exporting years for a given product (HS8)"
	label var hs4avg_sales_bl "Median firm's baseline exports volumn for a given HS4 industry"
	label var hs2avg_sales_bl "Median firm's baseline exports volumn for a given HS2 industry"
	
	compress


	save "Data/Coded data/Customs/customs_yearly_2000_2013_firmproduct_level_dairy_dropban`i'.dta", replace
	}
	
	
*** Generate info accuarcy related outcome variables

    use "Data/Coded data/Customs/customs_yearly_2000_2013_firmproduct_level_dairy_step1",replace
	
    keep if dairy==1
			  

	g price=value/quantity
	
    foreach var of varlist gwebindex gnewsindex {
	

        preserve
		
		replace `var'=-1 if `var'==.
	
	    collapse (sum) value quantity (mean) price, by (party_id hs_id year `var')
		drop if `var'<0
		
        ren value `var'_value
        ren quantity `var'_quantity
        ren price `var'_price
        
        reshape wide `var'_value `var'_quantity `var'_price, i(party_id hs_id year) j(`var')
	    
	    tempfile `var'_firmprodyear
	    save ``var'_firmprodyear', replace
	    
	     restore

    }
	

    collapse (sum) value, by(party_id hs_id year)

	
	merge 1:1 party_id hs_id year using `gwebindex_firmprodyear'
	drop _m
	
	merge 1:1 party_id hs_id year using `gnewsindex_firmprodyear'
	drop _m

	
	
	foreach v of varlist gwebindex_value* gwebindex_quantity* gnewsindex_value* gnewsindex_quantity* {

		replace `v'=0 if `v'==.
		
		}
		
		
	foreach v of varlist gwebindex_value* gnewsindex_value* {
	
	    gen `v'_share=`v'/value   //value share to countries with different level of gtrends ratio
	
	}
	
	egen id=group(party_id hs_id)
	
	
	
	preserve

	keep id party_id hs_id
	
	duplicates drop id,force
	sort id

	tempfile fillinids
	save `fillinids',replace

	restore
		
		
	fillin id year

	merge m:1 id using `fillinids', update
	
	drop if _m==2
	drop _m

	
	foreach v of varlist value gwebindex_value* gwebindex_quantity* gnewsindex_value* gnewsindex_quantity* ///
	                     {

		replace `v'=0 if _fillin==1
		
		gen ihs`v'=log(`v'+(`v'^2+1)^0.5)
		gen l`v'=log(`v'+0.1)
		
		}

	ren _fillin firmprodyear_fillin	
	drop id
	
	foreach v of varlist gwebindex_price* gnewsindex_price* {
	
	    gen l`v'=log(`v')
		
	}
	
	g entry=1-firmprodyear_fillin
	
	foreach v of varlist gwebindex_value* gnewsindex_value* {
	
	    g entry_`v'=(`v'>0)
		
		}
	
	drop value
	
	merge 1:1 party_id hs_id year using "Data/Coded data/Customs/customs_yearly_2000_2013_firmproduct_level_dairy_dropban0.dta"
	
	tab _m
	drop _m
	
	compress

	save "Data/Coded data/Customs/customs_yearly_2000_2013_firmproduct_level_dairy_dropban0.dta", replace



